{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6c96e1e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '2'\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c013d091",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer\n",
    "from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, PeftModel\n",
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "34f8cc39",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained('unsloth/Meta-Llama-3.1-8B-Instruct')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "272e2ca6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7cb2d4b7e2a84cc7b841bb2fc08237c5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-Llama-3.1-8B-Instruct-v2/commit/6ad47f4fd705accef4d2648775556bdda5a01390', commit_message='Upload tokenizer', commit_description='', oid='6ad47f4fd705accef4d2648775556bdda5a01390', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mesolitica/malaysian-Llama-3.1-8B-Instruct-v2', endpoint='https://huggingface.co', repo_type='model', repo_id='mesolitica/malaysian-Llama-3.1-8B-Instruct-v2'), pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.push_to_hub('mesolitica/malaysian-Llama-3.1-8B-Instruct-v2', private = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f6d2bdcb",
   "metadata": {},
   "outputs": [],
   "source": [
    "streamer = TextStreamer(tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "626a66b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b5b3108b3ea64de8a6e129ac59124924",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d62e19b4e92413db485876e2baae219",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0f647cdc70a84f03ae47eba4ee85f972",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/5.68G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ori_model = AutoPeftModelForCausalLM.from_pretrained(\n",
    "    'mesolitica/lora-embedding-128-llama3.1-8b-multipack-v2', torch_dtype=torch.bfloat16,\n",
    ")\n",
    "_ = ori_model.cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "19492e54",
   "metadata": {},
   "outputs": [],
   "source": [
    "ori_model = ori_model.merge_and_unload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "443a56bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/.local/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:590: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
      "  warnings.warn(\n",
      "/home/husein/.local/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:595: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
      "\n",
      "Cutting Knowledge Date: December 2023\n",
      "Today Date: 26 Jul 2024\n",
      "\n",
      "<|eot_id|><|start_header_id|>user<|end_header_id|>\n",
      "\n",
      "camne nk selesaikan masalah hutang negara<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n",
      "\n",
      "Untuk menyelesaikan masalah hutang negara, terdapat beberapa langkah yang boleh diambil. Berikut adalah beberapa cadangan:\n",
      "\n",
      "1. Meningkatkan pendapatan negara: Salah satu cara untuk mengurangkan hutang negara adalah dengan meningkatkan pendapatan negara. Ini boleh dilakukan melalui pelbagai cara seperti meningkatkan hasil daripada sumber asli, meningkatkan hasil cukai, dan meningkatkan hasil daripada sektor perkhidmatan.\n",
      "\n",
      "2. Mengurangkan perbelanjaan neg\n"
     ]
    }
   ],
   "source": [
    "d = [\n",
    "    {'role': 'user', 'content': 'camne nk selesaikan masalah hutang negara'}\n",
    "]\n",
    "\n",
    "with torch.no_grad():\n",
    "    inputs = tokenizer.apply_chat_template(d, return_tensors = 'pt').to('cuda')\n",
    "    generate_kwargs = dict(\n",
    "        input_ids=inputs,\n",
    "        max_new_tokens=128,\n",
    "        top_p=0.95,\n",
    "        top_k=50,\n",
    "        temperature=0.0,\n",
    "        do_sample=False,\n",
    "        repetition_penalty=1.05,\n",
    "        streamer=streamer\n",
    "    )\n",
    "    generation_output = ori_model.generate(**generate_kwargs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3.10",
   "language": "python",
   "name": "python3.10"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
